Economics

Correlation Heatmap

First we have some data wrangle to do. Here, we choose a selection of 35 variables to work with. We also filter for the Connecticut governing regions rather than counties. Finally, we arrange the variables in sensible order so they appear in similar blocks on the correlation plot.

Code
pacman::p_load(
  dplyr,
  tidyr,
  tibble,
  stringr,
  purrr,
  tidyr
)

conflicted::conflicts_prefer(
  dplyr::filter(),
  dplyr::select(),
  .quiet = TRUE
)
source('dev/data_pipeline_functions.R')
source('dev/filter_fips.R')
metrics <- readRDS('data/sm_data.rds')[['metrics']]
metadata <- readRDS('data/sm_data.rds')[['metadata']]

# Use metadata to get help filter by dimension
econ_meta <- metadata %>% 
  filter(dimension == 'economics')

# Filter to economics dimension
econ_metrics <- metrics %>% 
  filter(variable_name %in% econ_meta$variable_name)

# Filter to latest year and new (post-2024) counties
# Also remove NAICS variables to leave us with an approachable number
# And pivot wider so it is easier to get correlations
econ_metrics_latest <- econ_metrics %>%
  filter_fips(scope = 'new') %>% 
  get_latest_year() %>% 
  filter(str_detect(variable_name, 'Naics|^lq|^avgEmpLvl', negate = TRUE))

# Pivot wider for easier correlations below
econ_metrics_latest <- econ_metrics_latest %>% 
  select(fips, variable_name, value) %>% 
  unique() %>% 
  pivot_wider(
    names_from = 'variable_name',
    values_from = 'value'
  ) %>% 
  unnest(!fips)

# Arrange in some reasonable order
econ_metrics_latest <- econ_metrics_latest %>% 
  select(
    matches('Code_|metro'),
    matches('employ|abor|Worker'), # employment
    matches('Sales'),
    matches('Earn|Income'),
    everything(),
    -fips,
    -matches('expHiredLaborPercOpExp')
  )

Now we can build an interactive correlation plot. We are calculating correlations based on complete pairwise observations to account for missing data and the changes to Census Bureau reporting for Connecticut.

This is a reminder to do a deeper dive on missing data once more of the data have been wrangled. There is a substantial amount given the varying scales at which data are collected, not to mention the issues at the county level with Connecticut.

Code
pacman::p_load(
  dplyr,
  ggplot2,
  plotly,
  reshape,
  Hmisc,
  viridisLite
)

# Make a correlation matrix using all the selected variables
cor <- econ_metrics_latest %>% 
  as.matrix() %>% 
  rcorr()

# Melt correlation values and rename columns
cor_r <- melt(cor$r) %>% 
  setNames(c('var_1', 'var_2', 'value'))

# Save p values
cor_p <- melt(cor$P)
p.value <- cor_p$value

# Make heatmap with custom text aesthetic for tooltip
plot <- cor_r %>% 
  ggplot(aes(var_1, var_2, fill = value, text = paste0(
  'Var 1: ', var_1, '\n',
  'Var 2: ', var_2, '\n',
  'Correlation: ', format(round(value, 3), nsmall = 3), '\n',
  'P-Value: ', format(round(p.value, 3), nsmall = 3)
))) + 
  geom_tile() + 
  scale_fill_viridis_c() + 
  theme(axis.text.x = element_text(hjust = 1, angle = 45)) +
  labs(
    x = NULL,
    y = NULL,
    fill = 'Correlation'
  )

# Convert to interactive plotly figure with text tooltip
ggplotly(
  plot, 
  tooltip = 'text',
  width = 1000,
  height = 800
)

Interactive Correlation Plot

Cladogram

Show Wiltshire framework - what have we covered, what have we added.

Code
pacman::p_load(
  ggtree,
  dplyr,
  ape,
  data.tree,
  viridisLite,
  stringr
)

## Load data and add an origin level
dat <- readRDS('data/tree_dat.rds') %>% 
  filter(Dimension == 'Economics') %>% 
  mutate(Framework = 'Sustainability') %>% 
  select(Framework, Dimension:Indicator) %>% 
  mutate(across(
    everything(), 
    ~ str_trim(str_replace_all(., ';|%|/|\\.|\"|,|\\(|\\)', '_'))
  ))

dat$pathString <- paste(
  dat$Framework,
  dat$Dimension,
  dat$Index,
  dat$Indicator,
  sep = '/'
)
tree <- as.Node(dat)

# Convert the data.tree structure to Newick format
tree_newick <- ToNewick(tree)

# Read the Newick tree into ape
phylo_tree <- read.tree(text = tree_newick)

# Make all edge lengths 1
phylo_tree$edge.length <- rep(1, length(phylo_tree$edge.length))

# Add a space to end of node labels so it isn't cut off
phylo_tree$node.label <- paste0(phylo_tree$node.label, ' ')

# Plot it
plot(
  phylo_tree, 
  type = 'c',
  cex = 0.75,
  edge.width = 2,
  show.tip.label = TRUE,
  label.offset = 0,
  no.margin = TRUE,
  tip.color = 'black',
  edge.color = viridis(181),
  x.lim = c(-0.1, 5)
)

nodelabels(
  phylo_tree$node.label,
  cex = 0.8,
  bg = 'white'
)

Cladogram of Sustainability Metrics framework

Metadata Table

Using the table:

  • Click column headers to sort
  • Global search at top right, column search in each header
  • Change page length and page through results at the bottom
  • Use the download button to download a .csv file of the filtered table
Code
pacman::p_load(
  dplyr,
  reactable,
  stringr,
  htmltools
)

# Load full metadata table
metadata_all <- readRDS('data/sm_data.rds')[['metadata']]

# Pick out variables to display
metadata <- metadata_all %>% 
  select(
    metric,
    'Variable Name' = variable_name,
    definition,
    dimension,
    index,
    indicator,
    units,
    'Year' = latest_year, # Renaming latest year as year, not including og year
    source,
    scope,
    resolution,
    url
) %>% 
  setNames(c(str_to_title(names(.))))


###
htmltools::browsable(
  tagList(
    
    tags$div(
      style = "display: flex; gap: 16px; margin-bottom: 20px; justify-content: center;",
      
      tags$button(
        class = "btn btn-primary",
        style = "display: flex; align-items: center; gap: 8px; padding: 8px 12px;",
        tagList(fontawesome::fa("download"), "Show/hide more columns"),
        onclick = "Reactable.setHiddenColumns('metadata_table', prevColumns => {
          return prevColumns.length === 0 ? ['Definition', 'Scope', 'Resolution', 'Url'] : []
        })"
      ),
      
      tags$button(
        class = "btn btn-primary",
        style = "display: flex; align-items: center; gap: 8px; padding: 8px 12px;",
        tagList(fontawesome::fa("download"), "Download as CSV"),
        onclick = "Reactable.downloadDataCSV('metadata_table', 'sustainability_metadata.csv')"
      )
    ),
    
    reactable(
      metadata,
      sortable = TRUE,
      resizable = TRUE,
      filterable = TRUE,
      searchable = TRUE,
      pagination = TRUE,
      bordered = TRUE,
      wrap = TRUE,
      rownames = FALSE,
      onClick = 'select',
      striped = TRUE,
      pageSizeOptions = c(5, 10, 25, 50, 100),
      defaultPageSize = 5,
      showPageSizeOptions = TRUE,
      highlight = TRUE,
      style = list(fontSize = "14px"),
      compact = TRUE,
      columns = list(
        # Dimension = colDef(
          # minWidth = 75,
          # sticky = 'left'
        # ),
        # Index = colDef(
          # minWidth = 75,
          # sticky = 'left'
        # ),
        # Indicator = colDef(
          # minWidth = 100,
          # sticky = 'left'
        # ),
        Metric = colDef(
          minWidth = 200,
          sticky = 'left'
        ),
        'Variable Name' = colDef(
          minWidth = 150
        ),
        Definition = colDef(
          minWidth = 250
        ),
        # Units = colDef(minWidth = 50),
        # Year = colDef(minWidth = 75),
        'Latest Year' = colDef(minWidth = 75),
        Source = colDef(minWidth = 250),
        Scope = colDef(show = FALSE),
        Resolution = colDef(show = FALSE),
        Url = colDef(
          minWidth = 300,
          show = FALSE
        )
      ),
      defaultColDef = colDef(minWidth = 100),
      elementId = "metadata_table",
      details = function(index) {
        div(
          style = "padding: 15px; border: 1px solid #ddd; margin: 10px 0;
             background-color: #E0EEEE; border-radius: 10px; border-color: black;
             box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);",
          
          tags$h4(
            strong("Details"), 
          ),
          tags$p(
            strong('Metric Name: '), 
            as.character(metadata_all[index, 'metric']),
          ),
          tags$p(
            strong('Variable Name: '), 
            as.character(metadata_all[index, 'variable_name']),
          ),
          tags$p(
            strong('Definition: '), 
            as.character(metadata_all[index, 'definition']),
          ),
          tags$p(
            strong('Source: '), 
            as.character(metadata_all[index, 'source'])
          ),
          tags$p(
            strong('Latest Year: '), 
            as.character(metadata_all[index, 'latest_year'])
          ),
          tags$p(
            strong('All Years (cleaned, wrangled, and included): '), 
            as.character(metadata_all[index, 'year'])
          ),
          tags$p(
            strong('Updates: '), 
            str_to_title(as.character(metadata_all[index, 'updates']))
          ),
          tags$p(
            strong('URL: '), 
            tags$a(
              href = as.character(metadata_all[index, 'url']),
              target = '_blank',
              as.character(metadata_all[index, 'url'])
            )
          )
        )
      }
    )
  )
)

Data Table

Code
pacman::p_load(
  dplyr,
  reactable,
  stringr,
  htmltools
)

# Load metrics and metadata
metadata_all <- readRDS('data/sm_data.rds')[['metadata']]
metrics <- readRDS('data/sm_data.rds')[['metrics']]
fips_key <- readRDS('data/sm_data.rds')[['fips_key']]

# Value formatting function based on units
source('dev/format_values.R')

# Filter to economics metrics, join with metadata and county fips codes
econ_metrics <- metrics %>% 
  left_join(metadata_all, by = join_by('variable_name')) %>% 
  filter(dimension == 'economics') %>% 
  left_join(fips_key, by = join_by('fips')) %>% 
  mutate(county_name = ifelse(is.na(county_name), state_name, county_name)) %>% 
  format_values()

# Join relevant metadata to metrics table
econ_metrics <- metrics %>% 
  left_join(metadata_all, by = join_by('variable_name')) %>% 
  filter(dimension == 'economics') %>% 
  left_join(fips_key, by = join_by('fips')) %>% 
  mutate(county_name = ifelse(is.na(county_name), state_name, county_name)) %>% 
  # mutate(
  #   # Clean up value formatting
  #   units = case_when(
  #     units %in% c('count', 'acres', 'usd') & as.numeric(value) > 1e6 ~ paste(units, '(x1000)'),
  #     .default = units
  #   ),
  #   value = case_when(
  #     str_detect(units, '(x1000)') ~ as.character(round(as.numeric(value) / 1000)),
  #     str_detect(units, 'count|acres|usd') ~ format(round(as.numeric(value), 1), big.mark = ','),
  #     str_detect(units, 'usd') ~ paste0('$', value),
  #     units == 'binary' ~ ifelse(value == 1, 'True', 'False'),
  #     units == 'age' ~ paste(value, 'years'),
  #     units == 'percentage' ~ paste('%', value),
  #     .default = value
  #   )
  # ) %>%
  format_values() %>% 
  select(
    metric,
    'Variable Name' = variable_name,
    definition,
    year = year.x,
    Area = county_name,
    units,
    value
  ) %>% 
  setNames(c(str_to_title(names(.)))) %>% 
  filter(!is.na(Value))

get_str(econ_metrics)
tibble [132,504 × 7] (S3: tbl_df/tbl/data.frame)
 $ Metric       : chr [1:132504] "Civilian Labor Force" "Number Employed" "Nu"..
 $ Variable Name: chr [1:132504] "civLaborForce" "employed" "unemployed" "une"..
 $ Definition   : chr [1:132504] "Number of civilians age 16 or older who are"..
 $ Year         : chr [1:132504] "2000" "2000" "2000" "2000" "2001" "2001" "2"..
 $ Area         : chr [1:132504] "US" "US" "US" "US" "US" "US" "US" "US" "US""..
 $ Units        : chr [1:132504] "count x1000" "count x1000" "count x1000" "p"..
 $ Value        : chr [1:132504] "142602" "136905" "5697" "4%" "143787" "1369"..
Code
econ_metrics$Units %>% unique
 [1] "count x1000" "percentage"  "usd"         "count"       "categorical"
 [6] "binary"      "index"       "usd x1000"   "ratio"       "acres"      
[11] "acres x1000"
Code
## Reactable table
htmltools::browsable(
  tagList(
    
    tags$div(
      style = "display: flex; gap: 16px; margin-bottom: 20px; justify-content: center;",
      tags$button(
        class = "btn btn-primary",
        style = "display: flex; align-items: center; gap: 8px; padding: 8px 12px;",
        tagList(fontawesome::fa("download"), "Download as CSV"),
        onclick = "Reactable.downloadDataCSV('metrics_table', 'sustainability_metrics.csv')"
      )
    ),
    
    reactable(
      econ_metrics,
      sortable = TRUE,
      resizable = TRUE,
      filterable = TRUE,
      searchable = TRUE,
      pagination = TRUE,
      bordered = TRUE,
      wrap = TRUE,
      rownames = FALSE,
      onClick = 'select',
      striped = TRUE,
      pageSizeOptions = c(5, 10, 25, 50, 100),
      defaultPageSize = 5,
      showPageSizeOptions = TRUE,
      highlight = TRUE,
      style = list(fontSize = "14px"),
      compact = TRUE,
      columns = list(
        Metric = colDef(
          minWidth = 125,
          sticky = 'left'
        ),
        'Variable Name' = colDef(
          minWidth = 125
        ),
        Definition = colDef(
          minWidth = 250
        ),
        Units = colDef(minWidth = 100),
        'Year' = colDef(minWidth = 100)
      ),
      defaultColDef = colDef(minWidth = 100),
      elementId = "metrics_table"
    )
  )
)
Back to top